import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np
from scipy import stats
import sys
sys.path.append(sys.argv[1])

import pandas as pd  
import pickle
from decision_company import read_csv_file, fetch_column, calculate_quantile, create_condition, logical_and, filter_by_condition, convert_to_tuples
  
# Load the dataset using the custom function  
credit_customers = read_csv_file(os.path.join(sys.argv[1], 'credit_customers.csv')) 

# Calculate the 75th percentile for credit_amount and duration  
credit_amount_column = fetch_column(credit_customers, 'credit_amount')  
duration_column = fetch_column(credit_customers, 'duration')  
  
credit_amount_75th = calculate_quantile(credit_amount_column, 0.75)  
duration_75th = calculate_quantile(duration_column, 0.75)  
  
# Create conditions for filtering the dataset  
credit_amount_condition = create_condition(credit_amount_column, credit_amount_75th)  
duration_condition = create_condition(duration_column, duration_75th)  
  
# Combine conditions  
combined_condition = logical_and(credit_amount_condition, duration_condition)  
  
# Filter the dataset based on the combined condition  
high_credit_long_duration = filter_by_condition(credit_customers, combined_condition)  
  
# Extract the required information (client IDs, credit amounts, and loan durations)  
result = fetch_column(high_credit_long_duration, ['credit_amount', 'duration'])  
  
# Convert the result to a list of tuples  
result_list = list(convert_to_tuples(result))
  
print(result_list) 
# pickle.dump(result_list,open("./ref_result/result_list_1.pkl","wb"))

import pandas as pd  
import pickle 
from decision_company import read_csv_file, filter_by_value, fetch_index, convert_to_list

# Load the dataset  
  
# Filter the clients with a history of late payments  
late_payments = filter_by_value(high_credit_long_duration, 'credit_history', 'delayed previously')  
  
# Extract the client IDs  
result = fetch_index(late_payments)  
  
# Convert the result to a list  
result_list = convert_to_list(result)  
  
print(result_list)  
# pickle.dump(result_list,open("./ref_result/result_list_2.pkl","wb"))

import pandas as pd  
import pickle 
from decision_company import read_csv_file, calculate_median, filter_by_condition, create_condition, logical_and, fetch_index, convert_to_list, fetch_column

# Load the dataset  

# Filter the clients with multiple existing credits and high installment commitments  
installment_commitment_median = calculate_median(credit_customers, 'installment_commitment')  
multiple_credits_high_commitment = filter_by_condition(high_credit_long_duration, logical_and(create_condition(fetch_column(high_credit_long_duration, 'existing_credits'), 1), create_condition(fetch_column(credit_customers, 'installment_commitment'), installment_commitment_median)))  
  
# Extract the client IDs  
result = fetch_index(multiple_credits_high_commitment)  
  
# Convert the result to a list  
result_list = convert_to_list(result)  
  
print(result_list)  
# pickle.dump(result_list,open("./ref_result/result_list_3.pkl","wb"))

import pandas as pd  
import pickle 
from decision_company import read_csv_file, fetch_column, logical_and, filter_by_condition, count_rows

# Load the dataset  

# Create conditions  
condition1 = fetch_column(high_credit_long_duration, 'age') >= 25  
condition2 = fetch_column(high_credit_long_duration, 'age') <= 55  
  
# Perform logical AND operation  
final_condition = logical_and(condition1, condition2)  
  
# Filter the clients aged between 25 and 55  
clients_25_to_55 = filter_by_condition(high_credit_long_duration, final_condition)  
  
# Calculate the count of clients aged between 25 and 55  
result_count = count_rows(clients_25_to_55)  
    
print(result_count)  
# pickle.dump(result_count,open("./ref_result/result_count_1.pkl","wb"))

import pandas as pd  
import pickle 
from decision_company import read_csv_file, logical_or, filter_by_condition, fetch_index, convert_to_list, fetch_column

# Load the dataset  

# Create conditions  
condition1 = fetch_column(high_credit_long_duration, 'employment') == '4<=X<7'  
condition2 = fetch_column(high_credit_long_duration, 'employment') == '>=7'  
  
# Perform logical OR operation  
final_condition = logical_or(condition1, condition2)  
  
# Filter the clients with stable employment (4 years or more)  
stable_employment = filter_by_condition(high_credit_long_duration, final_condition)  
  
# Extract the client IDs  
result = fetch_index(stable_employment)  
  
# Convert the result to a list  
result_list = convert_to_list(result)  
  
print(result_list)  
# pickle.dump(result_list,open("./ref_result/result_list_4.pkl","wb"))

import pandas as pd  
import pickle 
from decision_company import read_csv_file, fetch_column, logical_or, filter_by_condition, count_rows

# Load the dataset  

# Create conditions  
condition1 = fetch_column(stable_employment, 'housing') == 'rent'  
condition2 = fetch_column(stable_employment, 'housing') == 'own'  
  
# Perform logical OR operation  
final_condition = logical_or(condition1, condition2)  
  
# Filter the clients residing in rented or owned housing  
rented_owned_housing = filter_by_condition(stable_employment, final_condition)  
  
# Calculate the count of clients residing in rented or owned housing  
result_count = count_rows(rented_owned_housing)  
  
print(result_count)  
# pickle.dump(result_count,open("./ref_result/result_count_2.pkl","wb"))

import pandas as pd  
import pickle 
from decision_company import read_csv_file, fetch_column, avg, locate_mode, visit_by_index

# Load the dataset  

# Fetch the 'credit_amount', 'duration', and 'employment' columns  
credit_amount_column = fetch_column(stable_employment, 'credit_amount')  
duration_column = fetch_column(stable_employment, 'duration')  
employment_column = fetch_column(stable_employment, 'employment')  
  
# Calculate the average credit amount and average loan duration  
average_credit_amount = avg(credit_amount_column)  
average_loan_duration = avg(duration_column)  
  
# Find the most common employment status  
most_common_employment = visit_by_index(locate_mode(employment_column),0)  

# Print the summary of common characteristics
print("Average credit amount:", average_credit_amount)
# pickle.dump(average_credit_amount,open("./ref_result/average_credit_amount.pkl","wb"))
print("Average loan duration:", average_loan_duration)
# pickle.dump(average_loan_duration,open("./ref_result/average_loan_duration.pkl","wb"))
print("Most common employment status:", most_common_employment)
# pickle.dump(most_common_employment,open("./ref_result/most_common_employment.pkl","wb"))


